- Covid Tracking Dataset (w/ hospitalised data)
- An Exploratory data analysis of the US dataset
- Basic triad of the dataset: validating data types and data integrity of each row
- Graphical Exploratory Analysis
- Analysis of Hospitalizations, ICU Hospitilizations, Active Cases, New Cases, Deaths, and Recoveries by State
- Since the normality of the independent variables is highly variable do to temporal and precision differences from each state, we further assess each state's data by viewing trends on the independent variables in both scatter and box-and-whisker plots.
- New York
- Alaska
- Alabama
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- New York
- Assessing Correlation of Independent Variables
- Build model for dependent Variable
Covid Tracking Dataset (w/ hospitalised data)
Source: https://covidtracking.com/ Latest data and plots: https://github.com/bielsnor/futureproof/tree/master/_notebooks/results
# TODO
all_cases['active'] = all_cases['positive'] - all_cases['recovered'] - all_cases['death']
# change location of 'active' column
cols = list(all_cases)
cols.insert(3, cols.pop(cols.index('active')))
all_cases = all_cases.loc[:, cols]
# Load datasets for US population and Hospital beds per 1000
us_population = pd.read_csv('data/us_population.csv')
hosp_beds = pd.read_csv('data/hospital_beds.csv')
state_abbrev = pd.read_csv('data/us_state_names.csv')
# add state abbreviations to us_population and hospital beds dataframe
for state in state_abbrev['State'].tolist():
# store state abbreviation in variable
abbrev = state_abbrev.loc[state_abbrev['State'] == state, 'Abbreviation'].tolist()[0]
# add abbrev to new column 'Abbreviation' in us_population df
us_population.loc[us_population['State'] == state, 'Abbreviation'] = abbrev
# add abbrev to new column in hosp_beds df
hosp_beds.loc[hosp_beds['Location'] == state, 'Abbreviation'] = abbrev
# change order of columns of us_population
cols = list(us_population)
cols.insert(2, cols.pop(cols.index('Abbreviation')))
us_population = us_population.loc[:, cols]
# drop unnecessary columns of us_population
us_population = us_population.drop(columns=['rank', 'Growth', 'Pop2018', 'Pop2010', 'growthSince2010', 'Percent', 'density'])
# drop unnecessary columns of hosp_beds
hosp_beds = hosp_beds.drop(columns=['Location', 'State/Local Government', 'Non-Profit', 'For-Profit'])
# change order of columns of hosp_beds
cols = list(hosp_beds)
cols.insert(0, cols.pop(cols.index('Abbreviation')))
hosp_beds = hosp_beds.loc[:, cols]
# filter out non-existing states like 'AS'
all_cases = all_cases[all_cases['state'].isin(state_abbrev['Abbreviation'].tolist())]
# Split dataframes by date
df_split_by_date = dict(tuple(all_cases.groupby('date')))
# Split dataframes by state
df_split_by_state = dict(tuple(all_cases.groupby('state')))
# merge dataframes us_population and all_cases
df_merge_uspop = all_cases.merge(us_population, how='left', left_on='state', right_on='Abbreviation')
df_merge_uspop = df_merge_uspop.drop(columns=['Abbreviation'])
df_merge_uspop = df_merge_uspop.rename(columns={'Pop': 'population'})
# change location of 'population' column
cols = list(df_merge_uspop)
cols.insert(2, cols.pop(cols.index('population')))
df_merge_uspop = df_merge_uspop.loc[:, cols]
# merge dataframes hosp_beds and df_merge_uspop
df_merge_hosp = df_merge_uspop.merge(hosp_beds, how='left', left_on='state', right_on='Abbreviation')
df_merge_hosp = df_merge_hosp.drop(columns=['Abbreviation'])
all_cases = df_merge_hosp.rename(columns={'Total': 'bedsPerThousand'})
# Calculate the total beds, and add the column
all_cases['total_beds'] = all_cases['population'] / 1000 * all_cases['bedsPerThousand']
# change abbreviations to state names
all_cases = all_cases.rename(columns={'state': 'abbrev'})
all_cases = all_cases.rename(columns={'State': 'state'})
# change location of 'state' column
cols = list(all_cases)
cols.insert(1, cols.pop(cols.index('state')))
all_cases = all_cases.loc[:, cols]
#print data frame head for spot visual data verification
all_cases
- Load and clean JHU data
- Merge JHU dataset with main dataset
# This cell takes some time, as it needs to connect to Kaggle Servers to retrieve data
kaggle.api.authenticate()
kaggle.api.dataset_download_files('benhamner/jhucovid19', path='./kaggle/input/jhucovid19/', unzip=True)
# Get Time-Series Data of cases as Pandas DataFrame
dir_jhu = './kaggle/input/jhucovid19/csse_covid_19_data/csse_covid_19_daily_reports'
df_list = []
for dirname, _, files in os.walk(dir_jhu):
for file in files:
if 'gitignore' not in file and 'README' not in file:
full_dir = os.path.join(dirname, file)
df_list.append(pd.read_csv(full_dir))
jhu_df = pd.concat(df_list, axis=0, ignore_index=True, sort=True)
# convert Last Update columns to datetime format
jhu_df.loc[:, 'Last Update'] = pd.to_datetime(jhu_df['Last Update']).apply(lambda x: x.date())
jhu_df.loc[:, 'Last_Update'] = pd.to_datetime(jhu_df['Last_Update']).apply(lambda x: x.date())
# Combine Last Update with Last_Update
jhu_df['LastUpdate'] = jhu_df['Last_Update'].combine_first(jhu_df['Last Update'])
# Combine Country/Region with Country_Region
jhu_df['CountryRegion'] = jhu_df['Country/Region'].combine_first(jhu_df['Country_Region'])
# Retrieve only US data
jhu_df = jhu_df[jhu_df['CountryRegion']=='US']
# Combine Province/State with Province_State
jhu_df['ProvinceState'] = jhu_df['Province/State'].combine_first(jhu_df['Province_State'])
# Drop unnecessary columns
jhu_df = jhu_df.drop(['Admin2', 'Lat', 'Latitude', 'Long_', 'Longitude', 'Combined_Key', 'Country/Region',
'Country_Region', 'Province/State', 'Province_State',
'Last Update', 'Last_Update', 'FIPS'], axis=1)
# Change column order
cols = list(jhu_df)
cols.insert(0, cols.pop(cols.index('CountryRegion')))
cols.insert(1, cols.pop(cols.index('ProvinceState')))
cols.insert(2, cols.pop(cols.index('LastUpdate')))
jhu_df = jhu_df.loc[:, cols]
# Change region to known US states
state_abbrs_dict = {}
for state in us.states.STATES:
state_abbrs_dict[state.abbr] = state.name
def toState(input_state, mapping):
abbreviation = input_state.rstrip()[-2:]
try:
return_value = mapping[abbreviation]
except KeyError:
return_value = input_state
return return_value
jhu_df['ProvinceState'] = jhu_df['ProvinceState'].apply(lambda x: toState(x, state_abbrs_dict) if x != 'Washington, D.C.' else 'District of Columbia')
# Filter out unknown states
jhu_df = jhu_df[jhu_df['ProvinceState'].isin(all_cases.state.unique().tolist())]
# Merge-sum rows with same date and State
jhu_df = jhu_df.groupby(['LastUpdate', 'ProvinceState']).agg(
{
'Active': sum,
'Confirmed': sum,
'Deaths': sum,
'Recovered': sum
}
).reset_index()
jhu_df.tail()
# Now that we have the JHU dataset relatively cleaned
# we can go ahead and merge its data with our main dataset
for i, row in all_cases.iterrows():
last_update = all_cases.at[i, 'date']
state = all_cases.at[i, 'state']
matching_row = jhu_df[jhu_df['ProvinceState'] == state]
matching_row = matching_row[matching_row['LastUpdate'] == last_update].reset_index()
if len(matching_row.values) > 0:
#all_cases.at[i, 'positive'] = matching_row['Confirmed'].values[0]
all_cases.at[i, 'active'] = matching_row['Active'].values[0]
#all_cases.at[i, 'recovered'] = matching_row['Recovered'].values[0] --- JHU was inconsistent, therefore removed
#all_cases.at[i, 'death'] = matching_row['Deaths'].values[0]
# Replace unknown recovery numbers with 0
if np.isnan(row['recovered']):
all_cases.at[i, 'recovered'] = 0
if all_cases.at[i, 'active'] == 0 or np.isnan(row['active']):
positive = all_cases.at[i, 'positive']
recovered = all_cases.at[i, 'recovered']
dead = all_cases.at[i, 'death']
all_cases.at[i, 'active'] = positive - recovered - dead
#all_cases.tail()
all_cases
# Save formatted dataset offline in case of disaster
dataset_file = 'results/all_cases.csv'
all_cases.to_csv(dataset_file)
# convert date to datetime format
all_cases['date'] = pd.to_datetime(all_cases['date'])
dataset_file = 'results/all_cases.csv'
covid_df = pd.read_csv(dataset_file, index_col=0)
# convert date to datetime format
covid_df['date'] = pd.to_datetime(covid_df['date'])
covid_df.info()
# set float format to 3 decimals
pd.set_option('display.float_format', lambda x: '%.3f' % x)
covid_df.head()
The NaN values may indicate that there were no to few Covid-19 patients at these date points. We further analyse the statistical values of the dataset columns to ensure data integrity and accuracy.
covid_df.describe()
# TODO rounding up the numbers
# drop unnecessary columns
covid_cleaned = covid_df.drop(['hospitalized', 'bedsPerThousand'], axis=1)
covid_100k = covid_cleaned.copy()
# list of columns to transform to per 100k
columns_list = ['positive', 'active', 'recovered', 'death', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'onVentilatorCurrently', 'total_beds', 'totalTestsViral']
# add columns per 100k
for column in columns_list:
if column == 'total_beds':
covid_100k['BedsPer100k'.format(column)] = (covid_cleaned.loc[:, column] / covid_cleaned.loc[:, 'population']) * 100000
else:
covid_100k['{}_100k'.format(column)] = (covid_cleaned.loc[:, column] / covid_cleaned.loc[:, 'population']) * 100000
covid_100k = covid_100k.drop(columns_list, axis=1)
covid_100k['date'] = pd.to_datetime(covid_100k['date'])
start_date = '2020-04-18'
end_date = '2020-05-19'
mask = (covid_100k['date'] > start_date) & (covid_100k['date'] <= end_date)
covid_100k_last_month = covid_100k.loc[mask]
covid_100k_last_month_part1 = covid_100k_last_month.groupby('date').sum().loc[:, ['positive_100k','active_100k','recovered_100k','death_100k','hospitalizedCumulative_100k']].diff(periods=1, axis=0)
covid_100k_last_month_part2 = covid_100k_last_month.groupby('date').sum().loc[:, ['inIcuCurrently_100k','onVentilatorCurrently_100k','BedsPer100k']]
final_100k_last_month = covid_100k_last_month_part1.merge(covid_100k_last_month_part2, left_index=True, right_index=True)
final_100k_last_month.head()
final_100k_last_month.describe()
# Timeseries plot
fig, ax = plt.subplots(figsize = (16, 10))
ax.plot(covid_df.date, covid_df.hospitalizedCurrently)
ax.set_title('Number of USA Patients Currently in ICU')
# TODO change the X-axis to chip the year
# TODO which timeseries plots do we need?
# TODO how to temporarily remove the outliers?
# save description cleaned dataset to csv
describe_file = 'results/final_100k_last_month.csv'
final_100k_last_month.describe().to_csv(describe_file)
# Omitting the categorical (states/abbreviations) and time columns
# There must be an easier way for you, but this was the easiest way I could think of
covid_cleaned['date'] = pd.to_datetime(covid_cleaned['date'])
# mask data for last month
start_date = '2020-04-18'
end_date = '2020-05-19'
mask = (covid_cleaned['date'] > start_date) & (covid_cleaned['date'] <= end_date)
covid_cleaned_last_month = covid_cleaned.loc[mask]
plot_df = covid_cleaned_last_month[['population', 'active', 'recovered', 'death', 'hospitalizedCurrently', 'inIcuCurrently', 'onVentilatorCurrently', 'total_beds']]
plot_df_last_month = covid_100k_last_month[['population', 'active_100k', 'recovered_100k', 'death_100k', 'hospitalizedCurrently_100k', 'inIcuCurrently_100k', 'onVentilatorCurrently_100k', 'BedsPer100k']]
timeseries_usa_df = covid_100k.loc[:, ['date', 'positive_100k', 'active_100k', 'recovered_100k', 'death_100k', 'hospitalizedCurrently_100k', 'inIcuCurrently_100k', 'onVentilatorCurrently_100k', 'BedsPer100k']].groupby('date').sum().reset_index()
# timeseries_usa_df['log_positive'] = np.log(timeseries_usa_df['positive_100k'])
# timeseries_usa_df['log_active'] = np.log(timeseries_usa_df['active_100k'])
# timeseries_usa_df['log_recovered'] = np.log(timeseries_usa_df['recovered_100k'])
# timeseries_usa_df['log_death'] = np.log(timeseries_usa_df['death_100k'])
timeseries_usa_df.tail()
covid_df.head()
# get data from last day
# plot_df_last_date = plot_df.loc[covid_df['date'] == '2020-05-18']
# Plotting histograms to gain insight of the distribution shape, skewness and scale
fig, axs = plt.subplots(4,2,figsize = (16, 16))
sns.set()
for i, column in enumerate(plot_df_last_month.columns):
if (i + 1) % 2 == 0:
ax = axs[(i//2), 1]
else:
ax = axs[(i//2), 0]
sns.distplot(plot_df_last_month[column], fit=norm, fit_kws=dict(label='normality'), hist_kws=dict(color='plum', edgecolor='k', linewidth=1, label='frequency'), ax=ax, color='#9d53ad')
ax.legend(loc='upper right')
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Looking at linearity and variance with scatterplots
# Removing the target variable and saving it in another df
target = plot_df.hospitalizedCurrently
indep_var = plot_df.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var.columns):
ax=fig.add_subplot(4, 3, i+1)
sns.regplot(x=indep_var[col], y=target, data=indep_var, label=col, scatter_kws={'s':10}, line_kws={"color": "plum", 'label': 'hospitCurr'})
plt.suptitle('Scatterplots with Target Hospitalized Patients Showing Growth Trajectories', fontsize=18)
plt.legend()
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Assessing the normality of the distribution with a boxplot
# Boxplot with removed outliers
fig, ax = plt.subplots(figsize = (16, 12))
for i, col in enumerate(plot_df.columns):
ax=fig.add_subplot(4, 3, i+1)
sns.boxplot(x=plot_df[col], data=plot_df, color='lightblue', showfliers=False)
plt.suptitle('Boxplots of Independent Variables', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# get data from last day
plot_df_last_date = plot_df.loc[covid_df['date'] == '2020-05-18']
fig, ax = plt.subplots(figsize = (16, 12))
for i, col in enumerate(plot_df_last_date.columns):
ax=fig.add_subplot(4, 3, i+1)
sns.boxplot(x=plot_df_last_date[col], data=plot_df, color='lightblue', showfliers=True)
plt.suptitle('Boxplots of Independent Variables', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
Analysis of Hospitalizations, ICU Hospitilizations, Active Cases, New Cases, Deaths, and Recoveries by State
Since the normality of the independent variables is highly variable do to temporal and precision differences from each state, we further assess each state's data by viewing trends on the independent variables in both scatter and box-and-whisker plots.
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
alaska = covid_df.loc[covid_df['abbrev'] == 'AK']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(alaska.date, alaska.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in AK Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
alaska = covid_df.loc[covid_df['abbrev'] == 'AK']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(alaska.date, alaska.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Patients in AK Currently on a Ventilator', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
alaska = covid_df.loc[covid_df['abbrev'] == 'AK']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(alaska.date, alaska.recovered, linewidth=3.3)
plt.title('Number of Recovered Cases in AK', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
alaska = covid_df.loc[covid_df['abbrev'] == 'AK']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(alaska.date, alaska.totalTestsViral, linewidth=3.3)
plt.title('Number of Cumulative Deaths in AK', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Split covid_df into subset with only NY values
alaska = covid_df.loc[covid_df['abbrev'] == 'AK']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(alaska.date, alaska.death, linewidth=3.3)
plt.title('Number of Cumulative Viral Tests in AK', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
###ENDALASKA
# Split covid_df into subset with only NY values
alabama = covid_df.loc[covid_df['abbrev'] == 'AL']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(alabama.date, alabama.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in AL Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
alabama = covid_df.loc[covid_df['abbrev'] == 'AL']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(alabama.date, alabama.hospitalized, linewidth=3.3)
plt.title('Number of Cummulative Hospitalizations in AL', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
alabama = covid_df.loc[covid_df['abbrev'] == 'AL']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(alabama.date, alabama.positiveCasesViral, linewidth=3.3)
plt.title('Number of Positve Cases in AL', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
alabama = covid_df.loc[covid_df['abbrev'] == 'AL']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(alabama.date, alabama.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in AL', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY']
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Omit the categorical and date cols
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
ax=fig.add_subplot(3, 3, i+1)
sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK
# TODO add some explanation / look more into collinear variables
# Heatmap of correlations
# Save correlations to variable
corr = covid_cleaned.corr(method='pearson')
# We can create a mask to not show duplicate values
mask = np.triu(np.ones_like(corr, dtype=np.bool))
# Set up the matplotlib figure
fig, ax = plt.subplots(figsize=(16,16))
# Generate heatmap
sns.heatmap(corr, annot=True, mask=mask, cmap='GnBu', center=0,
square=True, linewidths=.5, cbar_kws={"shrink": .5})
# We compare three models:
# - Polynomial Regression
# - Linear Regression
# - ElasticNet
# Copy DFs to not mess up original one
# We will use model_df for our regression model
model_df = all_cases.copy()
# Delete redundant rows
for row in ['abbrev', 'bedsPerThousand', 'hospitalized',
'state', 'hospitalizedCumulative', 'dataQualityGrade', 'lastUpdateEt']:
del model_df[row]
# Drop NaN values for hospitalizedCurrently
model_df = model_df.dropna(subset=['hospitalizedCurrently'])
# Drop Values with abnormal active-hospitalised ratios (outside Conf. Interval)
model_df['ratio_hospital'] = model_df['hospitalizedCurrently'] / model_df['active']
model_df = model_df[~(model_df['ratio_hospital'] >= model_df.ratio_hospital.quantile(0.99))]
#model_df = model_df[~(model_df['ratio_hospital'] <= model_df['ratio_hospital'].median())]
del model_df['ratio_hospital']
# Get peek of model to use
model_df.describe()
#Printing data for visual verification.
all_cases
### Mark Bee (https://www.facebook.com/markbeenyc) - do you need a sippy cup lesson on this information?